This notebook serves as a codebank for plotly charts.
# SAP Colors
from IPython.display import Image
Image(filename='../docs/sap_colors.png')
import plotly.graph_objects as go
import numpy as np
import pandas as pd
import random
np.random.seed(1)
import plotly.express as px
df = px.data.tips()
import plotly.io as pio
# png_renderer = pio.renderers["png"]
pio.renderers.default = "notebook"
#plotly==4.2.1
cv_f1 = [0.94, 0.89]
test_f1 = [0.58, 0.88]
fasttext_cnn_cv_f1_yes = [0.8254545454545454,
0.8493647912885662,
0.876611418047882,
0.9114391143911438,
0.9205776173285197,
0.9046728971962616,
0.8897058823529411,
0.908411214953271,
0.8847583643122676,
0.9150090415913201,
0.8959107806691449,
0.8800000000000001]
fasttext_rnn_cv_f1_yes = [0.823747680890538,
0.8683274021352313,
0.8624535315985131,
0.8781362007168458,
0.9027522935779816,
0.884476534296029,
0.8897058823529411,
0.8962962962962961,
0.8897058823529411,
0.9087591240875912,
0.8929889298892989,
0.8929889298892989]
flair_cnn_cv_f1_yes = [0.823747680890538,
0.8734402852049911,
0.8390596745027126,
0.866785079928952,
0.896551724137931,
0.8782287822878228,
0.8530465949820789,
0.8940754039497307,
0.8856624319419237,
0.901669758812616,
0.8876611418047881,
0.88]
elmo_small_cv_f1_yes = [0.7943262411347518,
0.8729874776386405,
0.8655616942909761,
0.8954128440366974,
0.9199999999999999,
0.8909090909090909,
0.8913443830570903,
0.9114391143911438,
0.8947368421052632,
0.9038112522686025,
0.9117647058823529,
0.8917910447761194]
bert_cv_f1_yes = [0.8318264014466548,
0.7951807228915663,
0.8698884758364313,
0.867992766726944,
0.9068541300527241,
0.874296435272045,
0.8908765652951699,
0.9174311926605504,
0.9018181818181819,
0.9227941176470589,
0.90625,
0.8991150442477875]
final_extension_policy_test_f1_yes = [0.6923076923076923,
0.9600000000000001,
0.9230769230769231,
1.0,
0.8695652173913043,
0.9565217391304348,
0.9230769230769231,
1.0,
1.0,
0.9600000000000001,
1.0,
1.0]
first_type_dict = {'ContractDocuments': {'orderform': 125,
'trialorderform': 10,
'addendum': 7,
'amendment': 36,
'addorderform': 7,
'appendix': 1,
'termination': 2,
'purchaseorder': 6,
'renewalorderform': 1,
'others': 1,
'supplementaltnc': 0,
'changerequest': 1},
'Orderform': {'orderform': 90,
'trialorderform': 7,
'addendum': 3,
'amendment': 21,
'addorderform': 22,
'appendix': 0,
'termination': 2,
'purchaseorder': 28,
'renewalorderform': 2,
'others': 0,
'supplementaltnc': 1,
'changerequest': 0},
'Mastercontract': {'orderform': 63,
'trialorderform': 1,
'addendum': 1,
'amendment': 0,
'addorderform': 18,
'appendix': 0,
'termination': 0,
'purchaseorder': 0,
'renewalorderform': 0,
'others': 2,
'supplementaltnc': 0,
'changerequest': 0},
'Terms&Conditions(allformats)': {'orderform': 0,
'trialorderform': 0,
'addendum': 0,
'amendment': 0,
'addorderform': 0,
'appendix': 0,
'termination': 0,
'purchaseorder': 0,
'renewalorderform': 0,
'others': 1,
'supplementaltnc': 0,
'changerequest': 0},
'MasterContract': {'orderform': 4,
'trialorderform': 0,
'addendum': 0,
'amendment': 0,
'addorderform': 0,
'appendix': 0,
'termination': 0,
'purchaseorder': 0,
'renewalorderform': 0,
'others': 0,
'supplementaltnc': 0,
'changerequest': 0},
'Amendments': {'orderform': 0,
'trialorderform': 0,
'addendum': 0,
'amendment': 1,
'addorderform': 0,
'appendix': 0,
'termination': 0,
'purchaseorder': 0,
'renewalorderform': 0,
'others': 0,
'supplementaltnc': 0,
'changerequest': 0}}
df.head()
fil = (df['sex']=='Female') & (df['smoker']=='No')
x0 = df[fil]['tip'].values
fil = (df['sex']=='Female') & (df['smoker']=='Yes')
x1 = df[fil]['tip'].values
fil = (df['sex']=='Male') & (df['smoker']=='No')
x2 = df[fil]['tip'].values
fil = (df['sex']=='Male') & (df['smoker']=='Yes')
x3 = df[fil]['tip'].values
fig = go.Figure()
fig.add_trace(go.Box(
x=x0,
name='Female (smoker)',
marker = dict(color='#ffaf00'),
boxmean=True, # represent mean,
jitter=0.3,
pointpos=-1.8,
boxpoints='all',
))
fig.add_trace(go.Box(
x=x1,
name='Female (non-smoker)',
marker = dict(color='#e35500'),
boxmean=True, # represent mean and standard deviation
jitter=0.3,
pointpos=-1.8,
boxpoints='all',
))
fig.add_trace(go.Box(
x=x2,
name='Male (smoker)',
marker = dict(color='#cccccc'),
boxmean=True, # represent mean,
jitter=0.3,
pointpos=-1.8,
boxpoints='all',
))
fig.add_trace(go.Box(
x=x3,
name='Male (non-smoker)',
marker = dict(color='#999999'),
boxmean=True, # represent mean and standard deviation
jitter=0.3,
pointpos=-1.8,
boxpoints='all',
))
fig.update_traces(orientation='h')
fig.update_layout(
title='Tip Amount (Female vs. Male) <b><br>Among smokers and non-smokers<br></b>',
xaxis = dict(title='Tip',
range=[-1,11],
),
yaxis=dict(
),
margin=dict(
l=60,
r=30,
b=80,
t=100,
),
autosize=False,
width=800,
height=500,
showlegend=False,
template ='plotly_white',
)
fig.show(renderer="notebook")
trace0 = go.Box(
y=fasttext_cnn_cv_f1_yes,
name = 'fastText + cnn',
text=fasttext_cnn_cv_f1_yes,
# textposition = 'auto',
marker = dict(
color = '#4ac6b7',
)
)
trace1 = go.Box(
y=fasttext_rnn_cv_f1_yes,
name = 'fastText + rnn',
text=fasttext_rnn_cv_f1_yes,
# textposition = 'auto',
marker = dict(
color = '#4f5e7f',
)
)
trace2 = go.Box(
y=flair_cnn_cv_f1_yes,
name = 'flair + cnn',
text=flair_cnn_cv_f1_yes,
marker = dict(
color = '#965f8a',
)
)
trace3 = go.Box(
y=elmo_small_cv_f1_yes,
name = 'elmo + cnn',
text=elmo_small_cv_f1_yes,
marker = dict(
color = '#ff7070',
)
)
trace4 = go.Box(
y=bert_cv_f1_yes,
name = 'bert + cnn',
text=bert_cv_f1_yes,
marker = dict(
color = '#f8aa27',
)
)
data = [trace0, trace1, trace2, trace3, trace4]
fig = go.Figure(data=data)
fig.update_layout(
title='F1-Yes by Modelling Approach<b><br>Repeated 6-fold CV</b>',
xaxis=dict(title='Models',),
yaxis=dict(title='CV F1-Yes',),
margin=dict(
l=60,
r=30,
b=80,
t=100,
),
autosize=False,
width=800,
height=500,
showlegend=False,
template='plotly_white',
)
fig.show()
fil = (df['sex']=='Female') & (df['smoker']=='No')
x0 = df[fil]['tip'].values
fil = (df['sex']=='Female') & (df['smoker']=='Yes')
x1 = df[fil]['tip'].values
fil = (df['sex']=='Male') & (df['smoker']=='No')
x2 = df[fil]['tip'].values
fil = (df['sex']=='Male') & (df['smoker']=='Yes')
x3 = df[fil]['tip'].values
data = [x0, x1, x2, x3]
# colors = n_colors('rgb(5, 200, 200)', 'rgb(200, 10, 10)', 12, colortype='rgb')
colors = ['#ffaf00', '#e35500', '#cccccc', '#999999']
names = ['Female (smoker)', 'Female (non-smoker)', 'Male (smoker)', 'Male (non-smoker)']
fig = go.Figure()
for data_line, color, x in zip(data, colors, names):
fig.add_trace(go.Violin(x=data_line, line_color=color, name=x,))
fig.update_traces(orientation='h', side='positive', width=3, points=False)
fig.update_traces(meanline_visible=True,
# points='all', # show all points
jitter=0.05, # add some jitter on points for better visibility
scalemode='count') #scale violin plot area with total count
# fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
fig.update_layout(
title='Tip Amount (Female vs. Male) <b><br>Among smokers and non-smokers<br></b>',
xaxis=dict(title='Tip'),
yaxis=dict(
),
margin=dict(
l=60,
r=30,
b=80,
t=100,
),
autosize=False,
width=800,
height=500,
showlegend=True,
template='plotly_white',
)
fig.show()
The 'categoryorder' property is an enumeration that may be specified as:
- One of the following enumeration values:
['trace', 'category ascending', 'category descending',
'array', 'total ascending', 'total descending', 'min
ascending', 'min descending', 'max ascending', 'max
descending', 'sum ascending', 'sum descending', 'mean
ascending', 'mean descending', 'median ascending', 'median
descending']
fig = go.Figure()
fig.add_trace(go.Bar(
x=df.groupby('day')['tip'].sum().index,
y=df.groupby('day')['tip'].sum().values,
text=df.groupby('day')['tip'].sum().values,
name='Primary Product',
marker_color='#eb7300',
))
# Here we modify the tickangle of the xaxis, resulting in rotated labels.
fig.update_layout(barmode='group')
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.update_layout(
title='Total Tips Received <b><br>Daily</b>',
xaxis = dict(tickangle=-45,
categoryorder='total descending',),
yaxis=dict(showticklabels=False,
title='Tips Amount'
),
margin=dict(
l=60,
r=30,
b=80,
t=80,
),
autosize=False,
width=800,
height=500,
showlegend=False,
template ='plotly_white',
)
fig.show()
fig = go.Figure()
fig.add_trace(go.Bar(
x=df.groupby('day')['tip'].sum().values,
y=df.groupby('day')['tip'].sum().index,
text=df.groupby('day')['tip'].sum().values,
name='Primary Product',
marker_color='#eb7300',
orientation='h',
))
fig.update_layout(barmode='stack')
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.update_layout(
title='Total Tips Received <b><br>Daily</b>',
xaxis=dict(title='Tips Amount',),
yaxis=dict(showticklabels=True,
categoryorder='total descending',
),
margin=dict(
l=60,
r=30,
b=80,
t=80,
),
autosize=False,
width=800,
height=500,
showlegend=False,
template='plotly_white',
)
fig.show()
labels = ['Extension Policy', 'Analyses']
names = ['CV F1-Yes', 'Test F1-Yes']
x = labels
y = [0.94, 0.89]
y2 = [0.58, 0.88]
trace1 = go.Bar(
x=x,
y=y,
text=y,
textposition='auto',
name=names[0],
marker=dict( # customize bar and line color
color='#f9a828',
line=dict(
color='rgb(8,48,107)',
width=1),
),
opacity=0.6,
)
trace2 = go.Bar(
x=x,
y=y2,
text=y2,
textposition='auto',
name=names[1],
marker=dict(
color='#07617d',
line=dict(
color='rgb(8,48,107)',
width=1),
),
opacity=0.6,
)
fig = go.Figure(data=[trace1, trace2],)
fig.update_layout(barmode='group',
# gap between bars of adjacent location coordinates.
bargap=0.15,
# gap between bars of the same location coordinate.
bargroupgap=0.1
)
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.update_layout(
title='F1-Yes <br><b>5-fold CV VS. Test</b>',
xaxis=dict(title='Data Fields',),
yaxis=dict(title='f1-score (Yes)',
showticklabels=True,
categoryorder='total descending',
),
margin=dict(
l=60,
r=30,
b=80,
t=80,
),
autosize=False,
width=800,
height=500,
showlegend=False,
template='plotly_white',
)
fig.show()
# hardcode data
df['is_married'] = df.apply(lambda x: 'Yes' if np.random.rand() > .3 else 'No', axis=1)
df['is_employed'] = df.apply(lambda x: 'Yes' if np.random.rand() > .1 else 'No', axis=1)
df['is_senior'] = df.apply(lambda x: 'Yes' if np.random.rand() > .9 else 'No', axis=1)
df['is_child'] = df.apply(lambda x: 'Yes' if np.random.rand() > .9 else 'No', axis=1)
df['is_regular'] = df.apply(lambda x: 'Yes' if np.random.rand() > .5 else 'No', axis=1)
df['is_member'] = df.apply(lambda x: 'Yes' if np.random.rand() > .8 else 'No', axis=1)
df['is_big_spender'] = df.apply(lambda x: 'Yes' if np.random.rand() > .7 else 'No', axis=1)
df['is_lives_near'] = df.apply(lambda x: 'Yes' if np.random.rand() > .8 else 'No', axis=1)
df['is_lives_near'] = df['is_lives_near'].apply(lambda x: x if np.random.rand() > .3 else 'Unknown')
cols_list = ['smoker', 'is_married', 'is_employed', 'is_senior', 'is_child', 'is_regular',
'is_member', 'is_big_spender', 'is_lives_near']
no_list, yes_list, unknown_list = [], [], []
for i in cols_list:
no_list.append(df[df[i] == 'No'].shape[0])
yes_list.append(df[df[i] == 'Yes'].shape[0])
unknown_list.append(df[df[i] == 'Unknown'].shape[0])
x = cols_list
y = no_list
y_text = [str(round(x / df.shape[0] * 100, 2)) + '%' for x in no_list]
y2 = yes_list
y2_text = [str(round(x / df.shape[0] * 100, 2)) + '%' for x in yes_list]
y3 = unknown_list
y3_text = [str(round(x / df.shape[0] * 100, 2)) + '%' for x in unknown_list]
trace1 = go.Bar(x=x,
y=y,
text=y_text,
textposition='auto',
name='No',
marker=dict(
color='#3ec1d3',
line=dict(color='rgb(8,48,107)', width=1.5),
),
opacity=0.6)
trace2 = go.Bar(x=x,
y=y2,
text=y2_text,
textposition='auto',
name='Yes',
marker=dict(
color='#f6f7d7',
line=dict(color='rgb(8,48,107)', width=1.5),
),
opacity=0.6)
trace3 = go.Bar(x=x,
y=y3,
text=y3_text,
textposition='auto',
name='Unknown',
marker=dict(
color='#ff9a00',
line=dict(color='rgb(8,48,107)', width=1.5),
),
opacity=0.6)
data = [trace1, trace2, trace3]
layout = go.Layout(
xaxis=dict(tickangle=45, ),
yaxis=dict(title='count', ),
title="Customers' Survey Data <br><b>from 01 Jan to 07 Jan</b>",
margin=dict(
l=60,
r=30,
b=80,
t=80,
),
autosize=False,
width=800,
height=500,
showlegend=True,
template='plotly_white',
barmode='stack')
fig = go.Figure(data=data, layout=layout)
fig.show()
colors = ['orange', 'lightblue']
fig = go.Figure(data=[go.Pie(labels=['Below Threshold','Above Threshold'],
rotation = 30,
values=[80,2821])])
fig.update_traces(hoverinfo='label+percent',
textinfo='value+label+percent',
textposition='outside',
hole=.4, #size of pie hole
textfont_size=12,
pull=[0, 0.05], #pull distance of each slice
marker=dict(colors=colors, line=dict(color='#000000', width=0.1)))
fig.update_layout(
title='On-Premise OCR Scores (Threshold 0.81)',
margin=dict(
l=60,
r=30,
b=80,
t=100,
),
autosize=False,
width=800,
height=500,
showlegend=True,
template ='plotly_white',
)
fig.show()
df_automation_rate = pd.read_csv('../data/df_automation_rate_ar.csv',index_col=[0])
df_automation_rate.head(2)
trace1 = go.Scatter(x=df_automation_rate['threshold'].values,
y=df_automation_rate['automation_rate'].values,
mode='lines',
name='Automated Ratio')
trace2 = go.Scatter(x=df_automation_rate['threshold'].values,
y=df_automation_rate['f1_yes'].values,
mode='lines',
name='F1 \'Yes\'')
trace3 = go.Scatter(x=df_automation_rate['threshold'].values,
y=df_automation_rate['precision_yes'].values,
mode='lines',
name='Precision \'Yes\'')
trace4 = go.Scatter(x=df_automation_rate['threshold'].values,
y=df_automation_rate['recall_yes'].values,
mode='lines',
name='Recall \'Yes\'')
data = [trace1, trace2, trace3, trace4]
fig = go.Figure(data=data, layout=layout)
fig.update_layout(
title='Threshold Analysis<br><b>EN Cloud Audit Rights</b>',
xaxis=dict(
title='Threshold probability',
autorange=False,
fixedrange=True,
gridcolor="rgb(204, 204, 204)",
range=[0, 1],
showline=True,
showticklabels=True,
ticks="outside",
),
yaxis=dict(
title='F1-Score (Yes)',
autorange=False,
fixedrange=True,
gridcolor="rgb(204, 204, 204)",
range=[0, 1],
showline=True,
showticklabels=True,
ticks="outside",
),
margin=dict(
l=60,
r=30,
b=80,
t=80,
),
autosize=False,
width=800,
height=500,
showlegend=True,
legend=dict( # change legend position
orientation="h",
yanchor="bottom",
y=-0.4,
xanchor="right",
x=1
),
template='plotly_white',
)
fig.show()
# sankey left unique names
list(first_type_dict.keys())
# sankey right unique names
[list(v.keys()) for k,v in first_type_dict.items()][0]
# first_type_dict is the mapping dict
t = [list(v.values()) for k,v in first_type_dict.items()]
flat_list = [item for sublist in t for item in sublist]
unique_left = list(first_type_dict.keys())
unique_right = [list(v.keys()) for k,v in first_type_dict.items()][0]
labels = unique_left + unique_right
labels = labels
colors = ['#ffaf00' for x in range(len(unique_left))] + ["#BCDc50" for x in range(len(unique_right))]
fig = go.Figure(data=[go.Sankey(
valueformat=".0f",
valuesuffix=" docs",
# Define nodes
node=dict(
pad=15,
thickness=15,
line=dict(color="black", width=0.5),
label=labels,
color=colors,
),
# Add links
link=dict(
source=[x//len(unique_right) for x in range(len(unique_left)*len(unique_right))],
target=[i for i in range(len(unique_left), len(labels))]*len(unique_left),
value=flat_list,
label=labels,
color="rgba(4, 1, 1, 0.15)"
))])
fig.update_layout(title_text="<b>Document Type in Filename (left) VS. Actual Document Type (right)</b><br>Based on 468 manually reviewed documents",
font_size=10,
margin=dict(
l=60,
r=30,
b=80,
t=100,
),
autosize=False,
width=800,
height=500,
showlegend=False,
template='plotly_white',)
fig.show()
# save before you export
!jupyter nbconvert plotly_codebank_4.2.1_041120.ipynb --template toc2